log using subs_panel_v9.log, text replace

*Import the Orbis excel data files for 2008-2016.

foreach x in JP_subs US_subs JP_parents_annualreport US_parents_annualreport {

import excel using `x'_2017_v3_copy, sheet("Results") cellrange(B1) firstrow clear
rename Companyname name
rename Country country
rename CountryISOCode ccode
rename BvDIDnumber bvdid
rename Dateofincorporation inc_date
rename Typeofentity firm_type
rename Reportingbasis reporting_basis
rename Conscode conscode
rename Filingtype filing_type
rename ListedDelistedUnlisted listed
rename Latestannualaccountclosingda last_closingdate
rename Latestinterimaccountclosingd last_interim_closingdate
rename Lastavailyear lastavail_year
rename Accountingtemplate template
rename NACERev2mainsection industry
rename NACERev2Corecode4digits ind_code
rename BvDIndepIndic indep_index
rename GUOName guo_name
rename GUOBvDIDnumber guo_bvdid
rename GUOCountryISOcode guo_ccode
rename GUOType guo_firm_type
rename GUONACECorecode guo_indcode
rename GUONACEtextdescription guo_industry
rename GUODirect guo_direct
rename GUOTotal guo_total
rename Traderegisternumber trade_register_number
rename VATTaxnumber VAT_tax_number
rename EuropeanVATnumber EU_VAT_number
rename ISINnumber ISIN_number

forvalues i = 2008/2016 {
rename FixedassetsthUSD`i' fixed_assets`i'
}

rename IntangiblefixedassetsthUSD2 intan_assets2016
rename AO intan_assets2015
rename AP intan_assets2014
rename AQ intan_assets2013
rename AR intan_assets2012
rename AS intan_assets2011
rename AT intan_assets2010
rename AU intan_assets2009
rename AV intan_assets2008

rename TangiblefixedassetsthUSD201 tangible_assets2016
rename AX tangible_assets2015
rename AY tangible_assets2014
rename AZ tangible_assets2013
rename BA tangible_assets2012
rename BB tangible_assets2011
rename BC tangible_assets2010
rename TangiblefixedassetsthUSD200 tangible_assets2009
rename BE tangible_assets2008

forvalues i = 2008/2016 {
rename OtherfixedassetsthUSD`i' otherfixed_assets`i'
}

forvalues i = 2008/2016 {
rename TotalassetsthUSD`i' total_assets`i'
}

rename NoncurrentliabilitiesthUSD2 noncur_liab2016
rename BY noncur_liab2015
rename BZ noncur_liab2014
rename CA noncur_liab2013
rename CB noncur_liab2012
rename CC noncur_liab2011
rename CD noncur_liab2010
rename CE noncur_liab2009
rename CF noncur_liab2008

forvalues i = 2008/2016 {
rename CurrentliabilitiesthUSD`i' cur_liab`i'
}

forvalues i = 2008/2016 {
rename Numberofemployees`i' num_employees`i'
}

forvalues i = 2008/2016 {
rename SalesthUSD`i' sales`i'
}

rename OperatingPLEBITthUSD201 ebit2016
rename DI ebit2015
rename DJ ebit2014
rename DK ebit2013
rename DL ebit2012
rename DM ebit2011
rename DN ebit2010
rename OperatingPLEBITthUSD200 ebit2009
rename DP ebit2008

forvalues i = 2008/2016 {
rename PLbeforetaxthUSD`i' pretax_prof`i'
}

forvalues i = 2008/2016 {
rename TaxationthUSD`i' taxation`i'
}

forvalues i = 2008/2016 {
rename PLaftertaxthUSD`i' aftertax_prof`i'
}

rename PLforperiodNetincometh netincome2016
rename ES netincome2015
rename ET netincome2014
rename EU netincome2013
rename EV netincome2012
rename EW netincome2011
rename EX netincome2010
rename EY netincome2009
rename EZ netincome2008

forvalues i = 2008/2016 {
rename CostsofemployeesthUSD`i' costs_employees`i'
}

forvalues i = 2008/2016 {
rename Closingdate`i' closingdate`i'
}

*Check and delete subsidiaries with duplicate BvD ID.
duplicates report bvdid
duplicates tag bvdid, generate(dup)
drop if dup>0 & dup!=.
drop dup

display "Summary of `x'_2008-2016"
describe
summarize

save `x'_2008-2016, replace
}

*Convert dates stored as string to Stata dates for the closingdate variables (this is needed to append the US data to the JP data at the end of this file).
use JP_subs_2008-2016, clear
foreach x in 2016 2015 2014 2013 2012 2011 2010 2009 {
generate closingdate`x'_temp=date(closingdate`x', "DMY")
format closingdate`x'_temp %td
drop closingdate`x'
rename closingdate`x'_temp closingdate`x'
label variable closingdate`x' "Closing date`x'"
}
save JP_subs_2008-2016, replace

use US_subs_2008-2016, clear
foreach x in 2016 2015 2014 2013 2012 2011 2010 2009 2008 {
generate closingdate`x'_temp=date(closingdate`x', "DMY")
format closingdate`x'_temp %td
drop closingdate`x'
rename closingdate`x'_temp closingdate`x'
label variable closingdate`x' "Closing date`x'"
}
save US_subs_2008-2016, replace

foreach x in JP US {

use `x'_parents_annualreport_2008-2016, clear
display "Keep only `x' parents with consolidated accounts."
tabulate conscode
keep if conscode=="C1" | conscode=="C2"

keep bvdid conscode fixed_assets2016-costs_employees2008 closingdate*
foreach var of varlist bvdid conscode fixed_assets2016-costs_employees2008 closingdate* {
rename `var' guo_`var'
}

save `x'_parents_2008-2016_merge, replace

use `x'_subs_2008-2016, clear
display "Keep only subsidiaries with guo_ccode=`x'."
count if guo_ccode==""
tabulate guo_ccode
keep if guo_ccode=="`x'"

rename name name_2017
rename ccode ccode_2017
rename inc_date inc_date_2017
rename guo_ccode guo_ccode_2017

display "Keep only `x'-owned subsidiaries with unconsolidated accounts."
tabulate conscode
keep if conscode=="U1" | conscode=="U2"

*Merge the parent data into the subsidiary data.
merge m:1 guo_bvdid using `x'_parents_2008-2016_merge
drop if _merge==2

*Create the dummy variable indicating whether the parent's financial information is available (when using the 2017 version of Orbis).
generate parent_info_2017=0
replace parent_info_2017=1 if _merge==3
label variable parent_info_2017 "The parent's financial information is available in the 2017 version of Orbis."
drop _merge

save `x'_subs_parents_2008-2016, replace
}

*Import the Orbis excel data files for 2004-2012.

foreach x in JP_subs US_subs JP_parents_annualreport US_parents_annualreport {

import excel using `x'_2013_v4_copy, sheet("Results") cellrange(B1) firstrow clear
rename Companyname name
rename Country country
rename CountryISOCode ccode
rename BvDIDnumber bvdid
rename Dateofincorporation inc_date
rename Typeofentity firm_type
rename Reportingbasis reporting_basis
rename Conscode conscode
rename Filingtype filing_type
rename ListedDelistedUnlisted listed
rename Latestannualaccountclosingda last_closingdate
rename Latestinterimaccountclosingd last_interim_closingdate
rename Lastavailyear lastavail_year
rename Accountingtemplate template
rename NACERev2mainsection industry
rename NACERev2Corecode4digits ind_code
rename BvDIndepIndic indep_index
rename GUOName guo_name
rename GUOBvDIDnumber guo_bvdid
rename GUOCountryISOcode guo_ccode
rename GUOType guo_firm_type
rename GUONACERev2Corecode guo_indcode // Different from the 2017 version (GUONACECorecode)
rename GUONACERev2textdescript guo_industry // Different from the 2017 version (GUONACEtextdescription)
rename GUODirect guo_direct
rename GUOTotal guo_total
rename Traderegisternumber trade_register_number
rename VATTaxnumber VAT_tax_number
rename EuropeanVATnumber EU_VAT_number
rename ISINnumber ISIN_number

forvalues i = 2004/2012 {
rename FixedassetsthUSD`i' fixed_assets`i'
}

rename IntangiblefixedassetsthUSD2 intan_assets2012
rename AO intan_assets2011
rename AP intan_assets2010
rename AQ intan_assets2009
rename AR intan_assets2008
rename AS intan_assets2007
rename AT intan_assets2006
rename AU intan_assets2005
rename AV intan_assets2004

rename TangiblefixedassetsthUSD201 tangible_assets2012
rename AX tangible_assets2011
rename AY tangible_assets2010
rename TangiblefixedassetsthUSD200 tangible_assets2009
rename BA tangible_assets2008
rename BB tangible_assets2007
rename BC tangible_assets2006
rename BD tangible_assets2005
rename BE tangible_assets2004

forvalues i = 2004/2012 {
rename OtherfixedassetsthUSD`i' otherfixed_assets`i'
}

forvalues i = 2004/2012 {
rename TotalassetsthUSD`i' total_assets`i'
}

rename NoncurrentliabilitiesthUSD2 noncur_liab2012
rename BY noncur_liab2011
rename BZ noncur_liab2010
rename CA noncur_liab2009
rename CB noncur_liab2008
rename CC noncur_liab2007
rename CD noncur_liab2006
rename CE noncur_liab2005
rename CF noncur_liab2004

forvalues i = 2004/2012 {
rename CurrentliabilitiesthUSD`i' cur_liab`i'
}

forvalues i = 2004/2012 {
rename Numberofemployees`i' num_employees`i'
}

forvalues i = 2004/2012 {
rename SalesthUSD`i' sales`i'
}

rename OperatingPLEBITthUSD201 ebit2012
rename DI ebit2011
rename DJ ebit2010
rename OperatingPLEBITthUSD200 ebit2009
rename DL ebit2008
rename DM ebit2007
rename DN ebit2006
rename DO ebit2005
rename DP ebit2004

forvalues i = 2004/2012 {
rename PLbeforetaxthUSD`i' pretax_prof`i'
}

forvalues i = 2004/2012 {
rename TaxationthUSD`i' taxation`i'
}

forvalues i = 2004/2012 {
rename PLaftertaxthUSD`i' aftertax_prof`i'
}

rename PLforperiodNetincometh netincome2012
rename ES netincome2011
rename ET netincome2010
rename EU netincome2009
rename EV netincome2008
rename EW netincome2007
rename EX netincome2006
rename EY netincome2005
rename EZ netincome2004

forvalues i = 2004/2012 {
rename CostsofemployeesthUSD`i' costs_employees`i'
}

forvalues i = 2004/2012 {
rename Closingdate`i' closingdate`i'
}

*Check and delete subsidiaries with duplicate BvD ID.
duplicates report bvdid
duplicates tag bvdid, generate(dup)
drop if dup>0 & dup!=.
drop dup

display "Summary of `x'_2004-2012"
describe
summarize

save `x'_2004-2012, replace
}

*Convert dates stored as string to Stata dates for the closingdate variables (this is needed to append the US data to the JP data at the end of this file).
use JP_subs_2004-2012, clear
foreach x in 2012 2011 2010 2009 2008 2007 2006 2005 2004 {
generate closingdate`x'_temp=date(closingdate`x', "DMY")
format closingdate`x'_temp %td
drop closingdate`x'
rename closingdate`x'_temp closingdate`x'
label variable closingdate`x' "Closing date`x'"
}
save JP_subs_2004-2012, replace

use US_subs_2004-2012, clear
foreach x in 2012 2011 2010 2009 2008 2007 2006 2005 2004 {
generate closingdate`x'_temp=date(closingdate`x', "DMY")
format closingdate`x'_temp %td
drop closingdate`x'
rename closingdate`x'_temp closingdate`x'
label variable closingdate`x' "Closing date`x'"
}
save US_subs_2004-2012, replace

foreach x in JP US {
use `x'_parents_annualreport_2004-2012, clear
display "Keep only `x' parents with consolidated accounts."
tabulate conscode
keep if conscode=="C1" | conscode=="C2"

keep bvdid conscode fixed_assets2012-costs_employees2004 closingdate*
foreach var of varlist bvdid conscode fixed_assets2012-costs_employees2004 closingdate* {
rename `var' guo_`var'
}

save `x'_parents_2004-2012_merge, replace

use `x'_subs_2004-2012, clear
display "Keep only subsidiaries with guo_ccode=`x'."
count if guo_ccode==""
tabulate guo_ccode
keep if guo_ccode=="`x'"

display "Keep only `x'-owned subsidiaries with unconsolidated accounts."
tabulate conscode
keep if conscode=="U1" | conscode=="U2"

*Merge the parent data into the subsidiary data.
merge m:1 guo_bvdid using `x'_parents_2004-2012_merge
drop if _merge==2

*Create a dummy variable that indicates whether the parent's financial information is available (when using the 2013 version of Orbis).
generate parent_info_2013=0
replace parent_info_2013=1 if _merge==3
label variable parent_info_2013 "The parent's financial information is available in the 2013 version of Orbis."
drop _merge

save `x'_subs_parents_2004-2012, replace
}

foreach x in JP US {

display "Merge `x'_subs_parents_2004-2012 and `x'_subs_parents_2008-2016 to construct panel data for `x'-owned foreign subsidiaries for 2004-2016."
use `x'_subs_parents_2004-2012, clear
merge 1:1 bvdid using `x'_subs_parents_2008-2016, keepusing(name_2017 ccode_2017 inc_date_2017 guo_ccode_2017 fixed_assets2016-parent_info_2017) update

*Drop the subsidiaries that exist only in the 2017 version of Orbis.
drop if _merge==2

generate exist_2013=0
replace exist_2013=1 if _merge==1
label variable exist_2013 "Exists only in the 2013 version of Orbis."
drop _merge
save `x'_subs_parents_2004-2016_main, replace

*Merge the two datasets using EU_VAT_number, VAT_tax_number, trade_register_number, & ISIN_number as keys.
use `x'_subs_parents_2004-2012, clear
merge 1:1 bvdid using `x'_subs_parents_2008-2016, keepusing(name_2017 ccode_2017 inc_date_2017 guo_ccode_2017 fixed_assets2016-parent_info_2017) update
keep if _merge==1
drop name_2017 ccode_2017 inc_date_2017 guo_ccode_2017 fixed_assets2016-parent_info_2017 _merge
save `x'_subs_parents_2004-2012_NotMatched, replace

foreach id in EU_VAT_number VAT_tax_number trade_register_number ISIN_number {
use `x'_subs_parents_2008-2016, clear
duplicates report `id'
duplicates tag `id', generate(dup)
drop if dup>0 & dup!=.
drop dup

save `x'_subs_parents_2008-2016_`id', replace

use `x'_subs_parents_2004-2012_NotMatched, clear
duplicates report `id'
duplicates tag `id', generate(dup)
drop if dup>0 & dup!=.
drop dup
merge 1:1 `id' using `x'_subs_parents_2008-2016_`id', keepusing(name_2017 ccode_2017 inc_date_2017 guo_ccode_2017 fixed_assets2016-parent_info_2017) update

drop if _merge==1 | _merge==2
drop _merge

save `x'_subs_matched_`id', replace
}

use `x'_subs_matched_EU_VAT_number, clear
merge 1:1 bvdid using `x'_subs_matched_VAT_tax_number, nogenerate
merge 1:1 bvdid using `x'_subs_matched_trade_register_number, nogenerate
merge 1:1 bvdid using `x'_subs_matched_ISIN_number, nogenerate

display "The number of `x'-owned subsidiaries merged using other ID numbers as keys"
count

generate merge_IDs=1
label variable merge_IDs "Merged using other ID numbers as keys."
generate exist_2013=0
label variable exist_2013 "Exists only in the 2013 version of Orbis."
save `x'_subs_parents_2004-2016_add, replace

merge 1:1 bvdid using `x'_subs_parents_2004-2016_main
drop _merge
save `x'_subs_parents_2004-2016_v3, replace
}

*Append the US data to the JP data.
use JP_subs_parents_2004-2016_v3, clear
append using US_subs_parents_2004-2016_v3

*Drop the subsidiaries located in different countries between the two versions of Orbis.
count if ccode!=ccode_2017 & ccode!="" & ccode_2017!=""
list name inc_date ccode name_2017 inc_date_2017 ccode_2017 if ccode!=ccode_2017 & ccode!="" & ccode_2017!=""
drop if ccode!=ccode_2017 & ccode!="" & ccode_2017!=""

*Check that the GUO's country code is the same in the two versions of Orbis.
count if guo_ccode!=guo_ccode_2017 & guo_ccode_2017!=""
tabulate guo_ccode guo_ccode_2017 if guo_ccode_2017!=""

save JPUS_subs_parents_2004-2016_v3, replace

log close